dir.create("SIPP_eventstudy")


sipp<-read.dta13("sipp_combined.dta")
sipp<-as.data.table(sipp)
sipp<-sipp[!is.na(year_limit),]
gc()

sipp[,ssuid:=interaction(ssuid,spanel)]
sipp[,workcheck:=max(ehrsall>=30 & time_limit < 0, na.rm=TRUE),by=.(ssuid,spanel,epppnum)]
sipp[,everssialone:=max(essiself==1 & essself!=1, na.rm=TRUE),by=.(ssuid,spanel,epppnum)]
sipp[,alreadydi:=max((essself==1 | essiself == 1) & time_limit <= -1, na.rm=TRUE),by=.(ssuid,spanel,epppnum)]
sipp[,post := time_limit >= 0]
sipp[ems%in%c("Married, spouse present","Never Married"),married := ems == "Married, spouse present"]
sipp[etenure%in%c("Owned or being bought by ... or","Rented"),ownhome := etenure == "Owned or being bought by ... or"]
sipp[,prewealth:=max(thhtnw - 9e90*(time_limit>=0),na.rm=TRUE),by=.(spanel,ssuid,epppnum)]
sipp[prewealth<=-9e9,prewealth:=NA]

#If two closest values of married exist, take the one before the shock:
sipp[,married:=married[abs(time_limit) == min(abs(time_limit),na.rm=TRUE)][1],by=.(epppnum,ssuid,spanel)]
sipp[,ownhome:=ownhome[abs(time_limit) == min(abs(time_limit),na.rm=TRUE)][1],by=.(epppnum,ssuid,spanel)]
sipp[,marhome:=interaction(married,ownhome)]
sipp[,ownbiz:=max(ebiznow1=="Yes",ebiznow2=="Yes",na.rm=TRUE),by=.(epppnum,ssuid,spanel)]


sipp[,isappwave:=max(!is.na(eapplyss)),by=.(spanel,swave,srefmon)]
sipp[isappwave==1&!is.na(essself),applyorDI:=eapplyss==1 | essself==1]

sipp[,emp_full:=ehrsall>=30]
sipp[,tpearn_real:=tpearn_real*12]
sipp[,sp_tpearn_real:=sp_tpearn_real*12]





sipp[,functional:=NA]
sipp[,functional_count:=0]
for(vvar in c("ecane", "ewchair", "ehearaid", "eseedif", "eheardif", "ehearnot", "espeechd", "edif10", "epushd", "estandd", "esitd", "estoopd", "ereachd", "egraspd", "estairsd", "ewalkd", "eteled", "eindif", "eoutdif", "ebeddif", "ebathdif", "edressd", "ewalk2d", "eeatdif", "etoiletd", "emoneyd", "emealsd", "ehworkd", "emedd")){
  sipp[!get(vvar)%in%c("Not in Universe","No"),functional:=1]
  sipp[is.na(functional) & get(vvar)%in%c("No"),functional:=0]
  sipp[!get(vvar)%in%c("Not in Universe","No"),functional_count:=functional_count+1]
  sipp[get(vvar)=="Not in Universe",eval(vvar):=NA]
#  sipp[,eval(vvar):=!get(vvar)%in%c("Not in Universe","No")]
}
sipp[is.na(functional),functional_count:=NA]
sipp[(!econd1%in%c("Not in Universe") & !is.na(econd1)) |
       (!econd2%in%c("Not in Universe") & !is.na(econd2)) |
       (!econd3%in%c("Not in Universe") & !is.na(econd3)),mental:=econd1%in%c("Mental or emotional problem or") |
       econd2%in%c("Mental or emotional problem or") |
       econd3%in%c("Mental or emotional problem or")]
sipp[(!econd1%in%c("Not in Universe") & !is.na(econd1)) |
       (!econd2%in%c("Not in Universe") & !is.na(econd2)) |
       (!econd3%in%c("Not in Universe") & !is.na(econd3)),congenital:=econd1%in%c("Learning disability","Mental retardation",
                                                                                  "Epilepsy","Cerebral palsy") |
       econd2%in%c("Learning disability","Mental retardation",
                   "Epilepsy","Cerebral palsy") |
       econd3%in%c("Learning disability","Mental retardation",
                   "Epilepsy","Cerebral palsy")]
sipp[(!econd1%in%c("Not in Universe") & !is.na(econd1)) |
       (!econd2%in%c("Not in Universe") & !is.na(econd2)) |
       (!econd3%in%c("Not in Universe") & !is.na(econd3)),musculoskeletal:=econd1%in%c("Arthritis or rheumatism","Head or spinal cord injury",
                                                                                       "Stiffness or deformity of the",
                                                                                       "Back or spine problems (including") |
       econd2%in%c("Arthritis or rheumatism","Head or spinal cord injury",
                   "Stiffness or deformity of the",
                   "Back or spine problems (including") |
       econd3%in%c("Arthritis or rheumatism","Head or spinal cord injury",
                   "Stiffness or deformity of the",
                   "Back or spine problems (including")]
sipp[(!econd1%in%c("Not in Universe") & !is.na(econd1)) |
       (!econd2%in%c("Not in Universe") & !is.na(econd2)) |
       (!econd3%in%c("Not in Universe") & !is.na(econd3)),diagnostic:=econd1%in%c(#"AIDS or AIDS Related Condition",
         "Cancer",
         "Heart trouble",
         "Lung or respiratory problems",
         "Stroke") |
       econd2%in%c(#"AIDS or AIDS Related Condition",
         "Cancer",
         "Heart trouble",
         "Lung or respiratory problems",
         "Stroke") |
       econd3%in%c(#"AIDS or AIDS Related Condition",
         "Cancer",
         "Heart trouble",
         "Lung or respiratory problems",
         "Stroke")]

sipp[functional==0 & is.na(mental),mental := 0]
sipp[functional==0 & is.na(congenital),congenital := 0]
sipp[functional==0 & is.na(musculoskeletal),musculoskeletal := 0]
sipp[functional==0 & is.na(diagnostic),diagnostic := 0]


sipp[,eeducate:=max(eeducate),by=.(epppnum,ssuid,spanel)]
sipp[,esex:=esex[1],by=.(epppnum,ssuid,spanel)]
sipp[,age_limit:= tage - time_limit]
sipp[,agebin:=floor(age_limit/10)*10]

sipp[epdjbthn=="Yes",tlstwrky:=rhcalyr]
sipp[,tlstwrky:=min(tlstwrky,na.rm=TRUE),by=.(epppnum,ssuid,spanel)]
sipp[is.infinite(tlstwrky),tlstwrky:=NA]

sipp[epdjbthn=="Yes",tmakmnyr:=rhcalyr]
sipp[,tmakmnyr:=min(tmakmnyr,na.rm=TRUE),by=.(epppnum,ssuid,spanel)]
sipp[is.infinite(tmakmnyr) | tmakmnyr == 0 | tmakmnyr==-1,tmakmnyr:=NA]


sipp[essiself == 1 & (is.na(tssistry) | tssistry == -1 | rhcalyr <= tssistry),tssistry := rhcalyr]
sipp[tssistry==-1,tssistry:=99999]
sipp[,tssistry:=min(tssistry,na.rm=TRUE),by=.(epppnum,ssuid,spanel)]
sipp[tssistry==99999,tssistry:=-1]

sipp[tagess!= -1,ssdiyear:=tagess+tbyear + 1]
sipp[tagess == -1,ssdiyear:=99999]
sipp[,ssdiyear:=min(ssdiyear,na.rm=TRUE),by=.(epppnum,ssuid,spanel)]
sipp[essself == 1 & (ssdiyear==99999 | rhcalyr <= ssdiyear),ssdiyear := rhcalyr]
sipp[,ssdiyear:=min(ssdiyear,na.rm=TRUE),by=.(epppnum,ssuid,spanel)]
sipp[ssdiyear==99999,ssdiyear:=-1]


#Apps by health status:
sipp[,mean(eapplyss,na.rm=TRUE),by=.(edisabl,edisprev)]

print(
  ggplot(data=
           sipp[!is.na(year_limit) &
                  year_limit - year_prev >= -4 &
                  everssialone == 0 &
                  as.numeric(eeducate) <= 39 &
                  esex=="Male" &
                  !is.na(tmakmnyr) & tmakmnyr < year_limit & #note tlstwrky = - 1 means you were working in first month of survey.
                  (tssistry >= year_limit | tssistry == -1) &
                  (ssdiyear >= year_limit | ssdiyear == -1) &
                  ems %in% c("Married, spouse present","Never Married"),
                .(mean=mean(essself,na.rm=TRUE),
                  count=sum(!is.na(essself))),by=.(ems,time_limit)])+
    geom_line(aes(x=time_limit,y=mean,group=ems, color=ems))+
    theme(legend.position="bottom")+ labs(color="")
)

print(feols(as.formula(paste0("essself ~ ems")),
            data= sipp[!is.na(year_limit) &
                         post == 1 &
                         year_limit - year_prev >= -4 &
                         everssialone == 0 &
                         as.numeric(eeducate) <= 39 &
                         esex=="Male" &
                         tlstwrky!=0 & !is.na(tlstwrky) & tlstwrky < year_limit&
                         !is.na(tmakmnyr) & tmakmnyr < year_limit & #note tlstwrky = - 1 means you were working in first month of survey.
                         (tssistry >= year_limit | tssistry == -1) &
                         (ssdiyear >= year_limit | ssdiyear == -1)
                       #                   time_limit >= -2 & time_limit <= 4 &
                       ,],
            cluster="ssuid")
)


sipp[,mean(!is.na(year_prev))]
sipp[,mean(year_limit - year_prev >= -4 & age_limit <= 62,na.rm=TRUE)]
sipp[year_limit - year_prev >= -4 & age_limit <= 62,mean(!is.na(everssialone))]
sipp[year_limit - year_prev >= -4 & age_limit <= 62,mean(everssialone==0,na.rm=TRUE)]
sipp[year_limit - year_prev >= -4 & age_limit <= 62 & everssialone==0,mean(!is.na((eeducate)))]
sipp[year_limit - year_prev >= -4 & age_limit <= 62 & everssialone==0 &!is.na(eeducate),mean(as.numeric(eeducate)!=-1)]
sipp[year_limit - year_prev >= -4 & age_limit <= 62 & everssialone==0 &as.numeric(eeducate)!=-1,mean(!is.na(esex))]
sipp[year_limit - year_prev >= -4 & age_limit <= 62 & everssialone==0 &as.numeric(eeducate)!=-1,mean(esex=="Male",na.rm=TRUE)]
sipp[year_limit - year_prev >= -4 & age_limit <= 62 & everssialone==0 &as.numeric(eeducate)!=-1 & esex=="Male",mean(!is.na(tmakmnyr) & tmakmnyr < year_limit)]
sipp[year_limit - year_prev >= -4 & age_limit <= 62 & everssialone==0 &as.numeric(eeducate)!=-1 & esex=="Male"
     & !is.na(tmakmnyr) & tmakmnyr < year_limit ,mean((ssdiyear >= year_limit | ssdiyear == -1))]
sipp[year_limit - year_prev >= -4 & age_limit <= 62 & everssialone==0 &as.numeric(eeducate)!=-1 & esex=="Male"
     & !is.na(tmakmnyr) & tmakmnyr < year_limit & (ssdiyear >= year_limit | ssdiyear == -1) ,mean(eeducate<=39)]

#This is where I make my key sample restrictions:
sipp <- sipp[!is.na(year_limit) &
               #                    post == 1 &
               year_limit - year_prev >= -4 &
               age_limit <= 62& #Note: this restriction is baked into PSID and HRS samples.
               everssialone == 0 &
               as.numeric(eeducate)!=-1 &
               esex=="Male" &
               ownbiz==0 &
               !is.na(tmakmnyr) & tmakmnyr < year_limit & #note tlstwrky = - 1 means you were working in first month of survey.
               (tssistry >= year_limit | tssistry == -1) &
               (ssdiyear >= year_limit | ssdiyear == -1)
             ,]